import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data=pd.read_csv("C:\\Users\\AASHIMA\\Desktop\\Python\\irisflowe.csv")
data.head()
| sepal_length | sepal_width | petal_length | petal_width | species | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | Iris-setosa |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | Iris-setosa |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | Iris-setosa |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | Iris-setosa |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | Iris-setosa |
data.describe()
| sepal_length | sepal_width | petal_length | petal_width | |
|---|---|---|---|---|
| count | 150.000000 | 150.000000 | 150.000000 | 150.000000 |
| mean | 5.843333 | 3.054000 | 3.758667 | 1.198667 |
| std | 0.828066 | 0.433594 | 1.764420 | 0.763161 |
| min | 4.300000 | 2.000000 | 1.000000 | 0.100000 |
| 25% | 5.100000 | 2.800000 | 1.600000 | 0.300000 |
| 50% | 5.800000 | 3.000000 | 4.350000 | 1.300000 |
| 75% | 6.400000 | 3.300000 | 5.100000 | 1.800000 |
| max | 7.900000 | 4.400000 | 6.900000 | 2.500000 |
data.species.unique()
array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)
data["Target"]=data.species.map({"Iris-setosa":0,"Iris-versicolor":1,"Iris-virginica":2})
df0=data[0:50]
df1=data[50:100]
df2=data[100:]
plt.xlabel("Sepal Lenght")
plt.ylabel("Sepal Width")
plt.scatter(df0["sepal_length"],df0['sepal_width'],color='green', marker='+')
plt.scatter(df1["sepal_length"],df1['sepal_width'],color='blue', marker='.')
<matplotlib.collections.PathCollection at 0x1df6412b9d0>
plt.xlabel("Petal Lenght")
plt.ylabel("Petal Width")
plt.scatter(df0["petal_length"],df0['petal_width'],color='green', marker='+')
plt.scatter(df1["petal_length"],df1['petal_width'],color='blue', marker='.')
<matplotlib.collections.PathCollection at 0x1df641a1350>
import plotly.express as px
fig=px.scatter(data,x="sepal_width",y="sepal_length",color="species")
fig.show()
fig=px.scatter(data,x="petal_width",y="petal_length",color="species")
fig.show()
x = data.drop("species", axis=1)
y = data["species"]
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y,
test_size=0.2,
random_state=0)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(x_train, y_train)
print(knn.score(x_test,y_test))
1.0
len(x_train)
120
len(x_test)
30
x_new = np.array([[20.0, 15.3, 11.5, 1.2,2.0]])
prediction = knn.predict(x_new)
print("Prediction: {}".format(prediction))
Prediction: ['Iris-virginica']
C:\Users\AASHIMA\anaconda3\Lib\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
x_new = np.array([[5, 2.9, 1, 0.2,0.5]])
prediction = knn.predict(x_new)
print("Prediction: {}".format(prediction))
Prediction: ['Iris-setosa']
C:\Users\AASHIMA\anaconda3\Lib\site-packages\sklearn\base.py:439: UserWarning: X does not have valid feature names, but KNeighborsClassifier was fitted with feature names
from sklearn.metrics import confusion_matrix
y_pred=knn.predict(x_test)
cm=confusion_matrix(y_test,y_pred)
cm
array([[11, 0, 0],
[ 0, 13, 0],
[ 0, 0, 6]], dtype=int64)
import seaborn as sns
plt.figure(figsize=(7,5))
sns.heatmap(cm,annot=True)
plt.xlabel("Prerdicted")
plt.ylabel("Truth")
#Diagonal values are correct prediicted values whereas other values shows that they are incorrect values
Text(58.222222222222214, 0.5, 'Truth')
from sklearn.metrics import classification_report
print(classification_report(y_test,y_pred))
precision recall f1-score support
Iris-setosa 1.00 1.00 1.00 11
Iris-versicolor 1.00 1.00 1.00 13
Iris-virginica 1.00 1.00 1.00 6
accuracy 1.00 30
macro avg 1.00 1.00 1.00 30
weighted avg 1.00 1.00 1.00 30